import math 
import numpy as np
import sympy as sp
import matplotlib.pyplot as plt
from typing import Callable, List


#generate x values
x = np.linspace(-5, 5, 400)
print(x)

[-5.         -4.97493734 -4.94987469 -4.92481203 -4.89974937 -4.87468672
 -4.84962406 -4.8245614  -4.79949875 -4.77443609 -4.74937343 -4.72431078
 -4.69924812 -4.67418546 -4.64912281 -4.62406015 -4.59899749 -4.57393484
 -4.54887218 -4.52380952 -4.49874687 -4.47368421 -4.44862155 -4.4235589
 -4.39849624 -4.37343358 -4.34837093 -4.32330827 -4.29824561 -4.27318296
 -4.2481203  -4.22305764 -4.19799499 -4.17293233 -4.14786967 -4.12280702
 -4.09774436 -4.0726817  -4.04761905 -4.02255639 -3.99749373 -3.97243108
 -3.94736842 -3.92230576 -3.89724311 -3.87218045 -3.84711779 -3.82205514
 -3.79699248 -3.77192982 -3.74686717 -3.72180451 -3.69674185 -3.6716792
 -3.64661654 -3.62155388 -3.59649123 -3.57142857 -3.54636591 -3.52130326
 -3.4962406  -3.47117794 -3.44611529 -3.42105263 -3.39598997 -3.37092732
 -3.34586466 -3.32080201 -3.29573935 -3.27067669 -3.24561404 -3.22055138
 -3.19548872 -3.17042607 -3.14536341 -3.12030075 -3.0952381  -3.07017544
 -3.04511278 -3.02005013 -2.99498747 -2.96992481 -2.94486216 -2.9197995
 -2.89473684 -2.86967419 -2.84461153 -2.81954887 -2.79448622 -2.76942356
 -2.7443609  -2.71929825 -2.69423559 -2.66917293 -2.64411028 -2.61904762
 -2.59398496 -2.56892231 -2.54385965 -2.51879699 -2.49373434 -2.46867168
 -2.44360902 -2.41854637 -2.39348371 -2.36842105 -2.3433584  -2.31829574
 -2.29323308 -2.26817043 -2.24310777 -2.21804511 -2.19298246 -2.1679198
 -2.14285714 -2.11779449 -2.09273183 -2.06766917 -2.04260652 -2.01754386
 -1.9924812  -1.96741855 -1.94235589 -1.91729323 -1.89223058 -1.86716792
 -1.84210526 -1.81704261 -1.79197995 -1.76691729 -1.74185464 -1.71679198
 -1.69172932 -1.66666667 -1.64160401 -1.61654135 -1.5914787  -1.56641604
 -1.54135338 -1.51629073 -1.49122807 -1.46616541 -1.44110276 -1.4160401
 -1.39097744 -1.36591479 -1.34085213 -1.31578947 -1.29072682 -1.26566416
 -1.2406015  -1.21553885 -1.19047619 -1.16541353 -1.14035088 -1.11528822
 -1.09022556 -1.06516291 -1.04010025 -1.01503759 -0.98997494 -0.96491228
 -0.93984962 -0.91478697 -0.88972431 -0.86466165 -0.839599   -0.81453634
 -0.78947368 -0.76441103 -0.73934837 -0.71428571 -0.68922306 -0.6641604
 -0.63909774 -0.61403509 -0.58897243 -0.56390977 -0.53884712 -0.51378446
 -0.4887218  -0.46365915 -0.43859649 -0.41353383 -0.38847118 -0.36340852
 -0.33834586 -0.31328321 -0.28822055 -0.26315789 -0.23809524 -0.21303258
 -0.18796992 -0.16290727 -0.13784461 -0.11278195 -0.0877193  -0.06265664
 -0.03759398 -0.01253133  0.01253133  0.03759398  0.06265664  0.0877193
  0.11278195  0.13784461  0.16290727  0.18796992  0.21303258  0.23809524
  0.26315789  0.28822055  0.31328321  0.33834586  0.36340852  0.38847118
  0.41353383  0.43859649  0.46365915  0.4887218   0.51378446  0.53884712
  0.56390977  0.58897243  0.61403509  0.63909774  0.6641604   0.68922306
  0.71428571  0.73934837  0.76441103  0.78947368  0.81453634  0.839599
  0.86466165  0.88972431  0.91478697  0.93984962  0.96491228  0.98997494
  1.01503759  1.04010025  1.06516291  1.09022556  1.11528822  1.14035088
  1.16541353  1.19047619  1.21553885  1.2406015   1.26566416  1.29072682
  1.31578947  1.34085213  1.36591479  1.39097744  1.4160401   1.44110276
  1.46616541  1.49122807  1.51629073  1.54135338  1.56641604  1.5914787
  1.61654135  1.64160401  1.66666667  1.69172932  1.71679198  1.74185464
  1.76691729  1.79197995  1.81704261  1.84210526  1.86716792  1.89223058
  1.91729323  1.94235589  1.96741855  1.9924812   2.01754386  2.04260652
  2.06766917  2.09273183  2.11779449  2.14285714  2.1679198   2.19298246
  2.21804511  2.24310777  2.26817043  2.29323308  2.31829574  2.3433584
  2.36842105  2.39348371  2.41854637  2.44360902  2.46867168  2.49373434
  2.51879699  2.54385965  2.56892231  2.59398496  2.61904762  2.64411028
  2.66917293  2.69423559  2.71929825  2.7443609   2.76942356  2.79448622
  2.81954887  2.84461153  2.86967419  2.89473684  2.9197995   2.94486216
  2.96992481  2.99498747  3.02005013  3.04511278  3.07017544  3.0952381
  3.12030075  3.14536341  3.17042607  3.19548872  3.22055138  3.24561404
  3.27067669  3.29573935  3.32080201  3.34586466  3.37092732  3.39598997
  3.42105263  3.44611529  3.47117794  3.4962406   3.52130326  3.54636591
  3.57142857  3.59649123  3.62155388  3.64661654  3.6716792   3.69674185
  3.72180451  3.74686717  3.77192982  3.79699248  3.82205514  3.84711779
  3.87218045  3.89724311  3.92230576  3.94736842  3.97243108  3.99749373
  4.02255639  4.04761905  4.0726817   4.09774436  4.12280702  4.14786967
  4.17293233  4.19799499  4.22305764  4.2481203   4.27318296  4.29824561
  4.32330827  4.34837093  4.37343358  4.39849624  4.4235589   4.44862155
  4.47368421  4.49874687  4.52380952  4.54887218  4.57393484  4.59899749
  4.62406015  4.64912281  4.67418546  4.69924812  4.72431078  4.74937343
  4.77443609  4.79949875  4.8245614   4.84962406  4.87468672  4.89974937
  4.92481203  4.94987469  4.97493734  5.        ]


#functions 
#linear equation
def linear_function(x):
    return 2*x + 3

#quadratic equation
def quadratic_function(x):
    return x**2

#trigonometric equation 
def trigonometric_function(x):
    return np.sin(x)

#exponential equation
def exponential_function(x):
    return np.exp(x)


#function to plot functions
def multiple_functions_plot(x, functions, labels, colors, titles):

    #length of functions
    num_functions = len(functions)
    fig,axes = plt.subplots(1,num_functions,figsize=(4*num_functions, 4))

    #for loop to iterate through range of num_fuinctions
    for i in range(num_functions):
        ax = axes[i]

        #plotting line plot 
        ax.plot(x, functions[i](x), label=labels[i], color=colors[i])

        #title of graph
        ax.set_title(titles[i])

        #label x axis
        ax.set_xlabel('x')

        #label y axis
        ax.set_ylabel('f(x)')

        #grid line
        ax.grid(True, ls='--', alpha=0.5, color='grey')

        #label line plot
        ax.legend()


#list of functions 
functions = [linear_function, quadratic_function, trigonometric_function, exponential_function]

#equation of each function
labels = ['f(x) = 2x + 3', 'f(x) = x^2', 'f(x) = sin(x)', 'f(x) = e^x']

#color of each function
colors = ['blue', 'green', 'orange', 'red']

#title of each function
titles = ['Linear Function', 'Quadratic Function', 'Trigonometric Function', 'Exponential Function']

#calling function to plot, multiple function plot
multiple_functions_plot(x, functions, labels, colors, titles)
plt.tight_layout();


#x values excluding 0
x_left = np.linspace(-5, -0.01, 1000)
x_right = np.linspace(0.01, 5, 1000)

#function f(x) = 1/x
def jump_discontinuity_function(x):
    return 1 / x

#corresponding y values for the function on the left and right sides of 0
y_left = jump_discontinuity_function(x_left)
y_right = jump_discontinuity_function(x_right)

#plot for the jump discontinuity function
plt.figure(figsize=(8, 6))
plt.plot(x_left, y_left, label='f(x) = 1/x (Left Side)', color='blue')
plt.plot(x_right, y_right, label='f(x) = 1/x (Right Side)', color='red')
plt.title('Jump Discontinuity Example')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.axvline(0, color='black', linestyle='--', label='x = 0')
plt.grid(True, ls='--', alpha=0.5, color='grey')
plt.legend();


#generate x values
x = np.linspace(-5, 5, 400)

#absolute value function
def absolute_value_function(x):
    return np.abs(x)

#corresponding y values for the absolute value function
y_absolute = absolute_value_function(x)

#plot for the absolute value function
plt.figure(figsize=(8, 6))
plt.plot(x, y_absolute, label='f(x) = |x|', color='blue')
plt.title('Absolute Value Function')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True, ls='--', alpha=0.5, color='grey')
plt.legend();


#generate x values
x = np.linspace(-5, 5, 400)

#heaviside step function
def heaviside_function(x):
    return np.heaviside(x, 1)

#corresponding y values for the Heaviside function
y_heaviside = heaviside_function(x)

#plot for the Heaviside step function
plt.figure(figsize=(8, 6))
plt.plot(x, y_heaviside, label='H(x)', color='green')
plt.title('Heaviside Step Function')
plt.xlabel('x')
plt.ylabel('H(x)')
plt.grid(True)
plt.legend();


#generate x values
x = np.linspace(-5, 5, 400)

#sign function
def sign_function(x):
    return np.sign(x)

#corresponding y values for the sign function
y_sign = sign_function(x)

#plot for the sign function
plt.figure(figsize=(8, 6))
plt.plot(x, y_sign, label='sgn(x)', color='orange')
plt.title('Sign Function')
plt.xlabel('x')
plt.ylabel('sgn(x)')
plt.grid(True)
plt.legend();


#generate x values
x = np.linspace(-5, 5, 1000)

#the approximation of the Dirac delta function using a narrow Gaussian
def dirac_delta_approximation(x, sigma=0.1):
    return (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * (x / sigma)**2)

#corresponding y values for the approximation
y_dirac_approximation = dirac_delta_approximation(x)

#plot for the approximation of the Dirac delta function
plt.figure(figsize=(8, 6))
plt.plot(x, y_dirac_approximation, label='Approximation of δ(x)', color='blue')
plt.title('Approximation of Dirac Delta Function')
plt.xlabel('x')
plt.ylabel('Approximation')
plt.grid(True, ls='--', alpha=0.5, color='grey')
plt.legend();


#generate x values
x = np.linspace(-5, 5, 1000)

#piecewise function
def piecewise_function(x):
    #an array to store the results
    result = np.zeros_like(x)
    
    #first segment: f(x) = x^2 for x < 0
    condition1 = x < 0
    result[condition1] = x[condition1]**2
    
    #second segment: f(x) = |x| for x >= 0
    condition2 = x >= 0
    result[condition2] = np.abs(x[condition2])
    
    return result

#corresponding y values for the piecewise function
y_piecewise = piecewise_function(x)

#plot for the piecewise function
plt.figure(figsize=(8, 6))
plt.plot(x, y_piecewise, label='Piecewise Function', color='blue')
plt.title('Piecewise Function Example')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)
plt.legend();


#generate x values
x = np.linspace(-5, 5, 400)

#define the convex function
def convex_function(x):
    return x**2

#corresponding y values for the convex function
y_convex = convex_function(x)

#plot convex function
plt.figure(figsize=(8, 6))
plt.plot(x, y_convex, label='f(x) = x^2', color='green')
plt.title('Convex Function')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)
plt.legend();


#generate x values
x = np.linspace(-5, 5, 400)

#non-convex function
def non_convex_function(x):
    return x**3

#corresponding y values for the non-convex function
y_non_convex = non_convex_function(x)

#plot for the non-convex function
plt.figure(figsize=(8, 6))
plt.plot(x, y_non_convex, label='f(x) = x^3', color='red')
plt.title('Non-Convex Function')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)
plt.legend();


#quasi-convex function, its first and second derivatives
def f(x):
    return x**4 - 2*x**3 + 2

def f_prime(x):
    return 4*x**3 - 6*x**2

def f_double_prime(x):
    return 12*x**2 - 12*x

#range of x values 
x = np.linspace(-1, 3, 400)

#function values and its derivatives for the new range of x
y = f(x)
y_prime = f_prime(x)
y_double_prime = f_double_prime(x)

# function and its first and second derivatives 
plt.figure(figsize=(10, 6))
plt.plot(x, y, label='$f(x) = x^4 - 2x^3 + 2$', color='blue')
plt.plot(x, y_prime, label="$f'(x) = 4x^3 - 6x^2$", color='green')
plt.plot(x, y_double_prime, label="$f''(x) = 12x^2 - 12x$", color='red')

#saddle point and minimum
#saddle point at x=0
plt.scatter(0, f(0), color='black', zorder=5)
plt.annotate('Saddle Point', (0, f(0)), textcoords="offset points", xytext=(0,10), ha='center')

#minimum at x=1.5 (We know this from the previous mathematical analysis)
plt.scatter(1.5, f(1.5), color='black', zorder=5)
plt.annotate('Minimum', (1.5, f(1.5)), textcoords="offset points", xytext=(0,10), ha='center')
plt.legend()
plt.grid(True)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('Analysis of the Quasi-Convex Function with Saddle Point and Minimum Annotated')

#adjusted the y-axis and x-axis
plt.ylim(-3, 3)
plt.xlim(-1, 3);


#function with the second derivative analysis 
plt.figure(figsize=(10, 6))
#function and its derivatives
plt.plot(x, y, label='$f(x) = x^4 - 2x^3 + 2$', color='blue')
plt.plot(x, y_prime, label="$f'(x) = 4x^3 - 6x^2$", color='green')
plt.plot(x, y_double_prime, label="$f''(x) = 12x^2 - 12x$", color='red')

#saddle point at x=0
plt.scatter(0, f(0), color='black', zorder=5)
plt.annotate('Saddle Point\n(f\'\'(x)=0)', (0, f(0)), textcoords="offset points", xytext=(-15,-30), ha='center')

#minimum at x=1.5
plt.scatter(1.5, f(1.5), color='black', zorder=5)
plt.annotate('Minimum\n(f\'(x)=0, f\'\'(x)>0)', (1.5, f(1.5)), textcoords="offset points", xytext=(0,10), ha='center')

#regions of concavity and convexity
plt.annotate('Convex Region\n(f\'\'(x)>0)', (2, f(2)), textcoords="offset points", xytext=(0,10), ha='center')
plt.annotate('Concave Region\n(f\'\'(x)<0)', (0.5, f(0.5)), textcoords="offset points", xytext=(0,10), ha='center')
plt.legend()
plt.grid(True)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('Quasi-Convex Function')
plt.ylim(-3, 3)
plt.xlim(-1, 3);


#symbols
x, y = sp.symbols('x y')

#function
f = 0.5 * x**2 + y**2

#gradient
gradient = sp.Matrix([sp.diff(f, var) for var in (x, y)])

#gradient at a specific point
gradient_at_point = gradient.subs({x: 10, y: 10})

#output
print(gradient_at_point)

Matrix([[10.0000000000000], [20]])


#function and its gradient
def f(x, y):
    return 0.5 * x**2 + y**2

def gradient(x, y):
    df_dx = x
    df_dy = 2 * y
    return np.array([df_dx, df_dy])

#gradient at a specific point
point_x = 10
point_y = 10

grad_at_point = gradient(point_x, point_y)
print(grad_at_point)

[10 20]


 #gradient_descent function parameters
# - start: The initial value of x for the optimization.
# - gradient: The gradient (derivative) function of the objective function.
# - learn_rate: The learning rate, controlling the step size in each iteration.
# - max_iter: The maximum number of iterations to prevent infinite loops.
# - tol: A tolerance value for stopping criteria (optional, defaults to 0.01).

def gradient_descent(start: float, gradient: Callable[[float], float],
                     learn_rate: float, max_iter: int, tol: float = 0.01) -> List[float]:
    #initialise x with the starting value
    x = start  
    #list to track the history of x values 
    steps = [start]  

    #iterate for a maximum of 'max_iter' times
    for _ in range(max_iter):

        #gradient adjusted by the learning rate
        diff = learn_rate * gradient(x)  

        #absolute change is smaller than the tolerance
        if np.abs(diff) < tol:  
            #if so, stop the iterations
            break  
        #update x with the new value
        x = x - diff  
        #append the new x value to the history list
        steps.append(x) 
 
    #return the list of x values during the optimisation
    return steps  


#gradient function 
def gradient(x):
    return 2 * x

#initial values and parameters for the optimisation
#starting value of x
start = 10.0  
#learning rate (controls step size)
learn_rate = 0.1  
#maximum number of iterations
max_iter = 50  
#tolerance for stopping criteria (optional)
tol = 0.01  

#gradient_descent 
steps = gradient_descent(start, gradient, learn_rate, max_iter, tol)

#final result
final_x = steps[-1]
print(f"Final result: x = {final_x}")

#plot gradient descent
plt.figure(figsize=(10, 6))
plt.plot(range(len(steps)), steps, marker='o', linestyle='-', color='b')
plt.title('Gradient Descent Steps')
plt.xlabel('Iteration')
plt.ylabel('Value of x')
plt.grid(True);

Final result: x = 0.04722366482869646


from typing import Callable, List


#function and its gradient
def func(x):
    return x**2 - 4*x + 1


def gradient(x):
    return 2 * x - 4


#gradient_descent
def gradient_descent(gradient, start, learn_rate, max_iter, tol):
    x = start
    steps = [start]
    
    for _ in range(max_iter):
        diff = learn_rate * gradient(x)
        if np.abs(diff) < tol:
            break
        x = x - diff
        steps.append(x)
    
    return x, steps


#gradient_descent curve plot
def gradient_descent_curve(ax, learning_rate):
    #gradient descent history
    final_x, history = gradient_descent(gradient, initial_x, learning_rate, iterations, tolerance)

    #plot the function
    ax.plot(x_values, y_values, label=f'Function: $f(x) = x^2 - 4x + 1$', lw=2)
    
    #plot the gradient descent steps
    ax.scatter(history, [func(x) for x in history], color='red', zorder=5, label='GD Steps')
    ax.plot(history, [func(x) for x in history], 'r--', lw=1, zorder=5)
    
    ax.annotate('Start', xy=(history[0], func(history[0])), xytext=(history[0], func(history[0]) + 10),
                arrowprops=dict(facecolor='black', shrink=0.05), ha='center')
    ax.annotate('End', xy=(final_x, func(final_x)), xytext=(final_x, func(final_x) + 10),
                arrowprops=dict(facecolor='black', shrink=0.05), ha='center')
    
    ax.set_title(f'Learning Rate: {learning_rate}')
    ax.set_xlabel('x value')
    ax.set_ylabel('f(x)')
    ax.grid(True, alpha=0.5, ls='--', color='grey')
    ax.legend()

    
    
fig, axs = plt.subplots(2, 2, figsize=(14, 10))
#x values for the function and their corresponding y values
x_values = np.linspace(-2, 11, 400)
y_values = func(x_values)
#parameters
initial_x = 9
iterations = 100
tolerance = 1e-6
learning_rates = [0.1, 0.3, 0.8, 0.9]
#plot learning rates
for ax, lr in zip(axs.flatten(), learning_rates):
    gradient_descent_curve(ax, lr)
plt.tight_layout();

Gradient Descent Algorithm¶

Hemant Thapa¶

1. Importing Libraries¶

2. Introduction¶

3. Function Requirements¶

Jump Discontinuity¶

$$lim(x→c-) f(x) ≠ lim(x→c+) f(x)$$¶

Cusp (Infinite) Discontinuity¶

$$ lim(x→c) |f'(x)| = ∞ $$¶

Step Function (Heaviside Function):¶

Sign Function (sgn(x)):¶

Dirac Delta Function (δ(x)):¶

Piecewise Functions:¶

Convexity¶

$$ f(λx₁ + (1 - λ)x₂) ≤ λf(x₁) + (1 - λ)f(x₂)$$¶

Convex Function:¶

Non-Convex Function¶

Mathematically, this test is expressed¶

Convexity and quasi-convex functions¶

Quasi-convex Function and Saddle Points¶

Multivariate Functions¶

The Complex Nature of Saddle Points¶

The Saddle Point Phenomenon¶

The plot above depicts the quasi-convex function:¶

4. Gradient Descent¶

5. Building Algorithm¶

The Essence of Gradient Descent¶

How It Operates¶

Practical Application¶

The Mathematical Model¶

$$pₙ₊₁ = pₙ - η∇F(pₙ)$$¶

6. Gradient Descent for Quadratic Function¶

7. Summary¶